000 - Export anonymized data



In [1]:

    
# -*- coding: UTF-8 -*-

import pandas as pd
import numpy as np



In [2]:

    
# Load csv file first
data = pd.read_csv("data/results-makers-40.csv", encoding="utf-8")



In [3]:

    
# Check data
#data # Equals to data.head()



In [4]:

    
# Get list of columns
columns = list(data.columns.values)



In [5]:

    
# Delete element we do not want to export
del_columns = [u'Unnamed: 0',
               u'id',
               u'submitdate', 
               u'lastpage', 
               u'startlanguage', 
               u'startdate', 
               u'datestamp', 
               u'ipaddr',
               u'Q002'
               ]
for i in del_columns:
    del columns[columns.index(i)]

# Debug
#print columns



In [6]:

    
# Shuffle the data in order to change the order of the rows
# Learnt here: http://stackoverflow.com/a/15772330/2237113
data_export = data[columns]
sorted_data_export = data_export.reindex(np.random.permutation(data_export.index))

# Debug
#sorted_data_export



In [7]:

    
# Rename the index, for more anonymization... for all the anonymized data except business models data
new_index = {}
for k,i in enumerate(sorted_data_export.index):
    new_index[i] = k
sorted_data_anonymized_final = sorted_data_export.rename(index=new_index)

#Debug
#sorted_data_anonymized_final



In [8]:

    
# Export datasets
sorted_data_anonymized_final.to_csv('data/makersinquiry-italy-2014.csv', encoding='utf-8')